
use "${hmrc_datadir}\trade_ct600_fy.dta" , clear

keep identifier turnover_ct600 trading_losses_case_i_r year fixed_assets ///
gross_profit no_employees total_assets sic_primary marginal_tax corp_tax ///
traderid trading_profit_r comcode flow country svalue netmass suppunit ///
calendar_year 

keep if flow=="D" | flow=="E"

collapse (sum) netmass svalue suppunit, by(identifier country ///
trading_losses_case_i_r trading_profit_r year turnover_ct600 gross_profit ///
fixed_assets no_employees total_assets sic_primary marginal_tax corp_tax ///
comcode flow)


merge m:1 country using  "${hmrc_datadir}\Trade\iso_table.dta"
drop _merge
merge m:1 alpha3code year using "${hmrc_datadir}\CBT\cbt_ready"
drop _merge
drop if tax==.
bysort identifier year: gen drop0=turnover==.
bysort identifier year: egen drop1=max(drop)
tab drop1
drop if drop1==1
drop drop1 drop0

save "${hmrc_datadir}\export_short_fy", replace

******merge the ownership type with export_short
merge m:1 identifier using  "${hmrc_datadir}\ownership2\duplicatestypedropped"
keep if _merge==3
drop _merge
save "${hmrc_datadir}\export_short_type_fy", replace


*****************merge the country firm relation with export****
use "${hmrc_datadir}\ownership2\mergedctry.dta", clear
drop type
duplicates drop
drop if alpha3code=="GBR"
merge 1:m identifier alpha3code using "${hmrc_datadir}\export_short_type_fy"
drop alpha2code country numeric
drop if _merge==1
gen related=(_merge==3)
gen mnc=(type=="foreign_subs"|type=="mnc_uksubs"|type=="uk_mnc_parent")
drop _merge
save "${hmrc_datadir}\export_short_type_related_fy", replace

use "${hmrc_datadir}\export_short_type_related_fy", clear

collapse (sum) netmass svalue suppunit, by(identifier alpha3code type ///
trading_losses_case_i_r trading_profit_r year turnover_ct600 gross_profit ///
fixed_assets no_employees total_assets sic_primary marginal_tax corp_tax ///
comcode flow uk_tax tax Population dgdp export gdp gdppc import u ///
estimation_governance stable_FI stable_FM  wb related mnc)

duplicates report identifier year comcode flow alpha3code


bysort identifier comcode flow alpha3code: gen n=_n
bysort n: gen id=_n if n==1
drop n
sort identifier comcode flow alpha3code id
bysort identifier comcode alpha3code: replace id=id[_n-1] if id==.


*Outcome variables
gen lnp_weight=log(svalue/netmass)
gen lnp_unit=log(svalue/suppunit)

*firm-level characteristics
gen lnemploy=log(no_employees)
gen lnsale=log(turnover_ct600)
gen lnk=log(total_assets)

*destination country characteristics
gen lnppl=log(Population)
gen lngdppc=log(gdppc)

*tax differential

qui encode alpha3code, gen(ctry)

gen low_tax_country=(tax<uk_tax)
gen high_tax_country=(tax>=uk_tax)
gen I_low_wedge=abs(tax-uk_tax)*low_tax_country
gen I_high_wedge=abs(tax-uk_tax)*high_tax_country

gen I_low_wedge_firm=abs(tax-marginal_tax)*low_tax_country
gen I_high_wedge_firm=abs(tax-marginal_tax)*high_tax_country

gen r_lowwedge=related*I_low_wedge
gen r_highwedge=related*I_high_wedge

gen r_lowwedge_firm=related*I_low_wedge_firm
gen r_highwedge_firm=related*I_high_wedge_firm

gen mnc_lowwedge=mnc*I_low_wedge
gen mnc_highwedge=mnc*I_high_wedge

gen mnc_lowwedge_firm=mnc*I_low_wedge_firm
gen mnc_highwedge_firm=mnc*I_high_wedge_firm

*industry code
gen sic=int(sic_primary/100)
gen industry="agriculture" if sic<5&sic>0
replace industry="mining" if sic<10&sic>4
replace industry="manufacturing" if sic<35&sic>9
replace industry="electricity & gas" if sic<36&sic>34
replace industry="water, waste, sewerage" if sic>35&sic<40
replace industry="construction" if sic>40&sic<45
replace industry="retail & wholesale trade" if sic>44&sic<49
replace industry="transportation" if sic>48&sic<54
replace industry="accommodation, restaurant services" if sic>54&sic<58
replace industry="information & communication" if sic>57&sic<64
replace industry="banking, finance, insurance, real estate" if sic>63&sic<69
replace industry="services" if sic>68&sic<83
replace industry="Public administration" if sic>83&sic<85
replace industry="education" if sic>84&sic<86
replace industry="health & social work" if sic>85&sic<90
replace industry="art, liberary, museum " if sic>89&sic<92
replace industry="sport, club, recreation and amusement" if sic>91&sic<94
replace industry="organisations, union, religious group " if sic>93&sic<95
replace industry="personal services" if sic>94&sic<97
replace industry="households" if sic>96&sic<99
replace industry="extraterritorital bodies" if sic==99

*drop public utility
drop if industry=="extraterritorial bodies" | industry=="households" | industry=="organisations, union, religious group " ///
| industry=="electricity & gas" | industry=="water, waste, sewerage" | ///
industry=="Public administration" | industry=="education" | industry=="health & social work" ///
| industry=="art, liberary, museum " | industry=="sport, club, recreation and amusement" ///
|industry=="personal services" 

encode industry, gen (ind_code)

*add exchange rate
merge m:1 year alpha3code using "${hmrc_datadir}\CBT\Exchange_rate"
drop if _merge==2
drop _merge

*add tax haven indicator
merge m:1 alpha3code using "${hmrc_datadir}\CBT\tax havens_iso.dta"
drop if _merge==2
drop alpha2code englishshortname
gen tax_haven=_merge==3
drop _merge

*add differentiated good indicator
*sum comcode
gen com8=comcode
replace com8=comcode/1000000 if comcode>10000000000
merge m:1 com8 using "${hmrc_datadir}\Trade\differentiated.dta"
drop if _merge==2
drop com8 _merge


*drop switzerland
drop if alpha3code=="CHE"

*merge with the distance and language and ex-colonial
merge m:1 alpha3code using "${hmrc_datadir}\CBT\CEPII.dta"
drop if _merge==2
drop _merge

*merge with the new governance indicator
merge m:1 year alpha3code using "${hmrc_datadir}\CBT\gov_ind_new"
drop if _merge==2
drop _merge
drop estimation_governance
rename est1 estimation_governance

save "${hmrc_datadir}\export_reg_fy",replace


drop if year<2005

* creat an indicator for firm-comcodity-year with less than 5 destination countries
bysort identifier comcode year: gen x=1 if low_tax_country==1
bysort identifier comcode year: egen n=total(x) if low_tax_country==1

gen less_5_ctry=(n<5) 
bysort identifier comcode year: egen drop=max(less_5_ctry)

tab drop
*unique identifier if drop==1
*unique identifier if drop==0
*unique identifier comcode year if drop==1
*unique identifier comcode year if drop==0
drop if drop==1
drop x n drop less_5_ctry

* creat an indicator for countries with tax rate in the lowest quintile of each firm-comodity-year
sort low_tax_country identifier comcode year tax

bysort identifier comcode year low_tax_country: gen n=_n
bysort identifier comcode year low_tax_country: gen N=_N

gen n1=n-1
gen P=.
gen p20=.
gen p40=.
gen p60=.
gen p80=.

foreach p in 20 40 60 80{
replace P=N*`p'/100 
replace p`p'=n if n>P&n1<P 
replace p`p'=(2*n-1)/2 if n1==P 
sort identifier comcode year p`p' 
bysort  identifier comcode year: replace p`p'=p`p'[_n-1] if p`p'==.
}
gen q=1 if n<=p20 & low_tax_country==1

replace q=2 if n>p20 & n<=p40 & low_tax_country==1

replace q=3 if n>p40 & n<=p60 & low_tax_country==1

replace q=4 if n>p60 & n<=p80 & low_tax_country==1

replace q=5 if n>p80 & low_tax_country==1

*tab q
*tab low_tax_country if q==.

rename q quintile
drop p* P n n1 N

* creat an indicator for lowest five rates of countries of each firm-comodity-year
gen low5=0 
forv i=1/5 {
bysort low_tax_country identifier comcode year: egen tax_min=min(tax) if low5==0
replace low5=1 if tax==tax_min & low_tax_country==1
drop tax_min
}

*tab low5 if low_tax_country==1
*tab low5 if low_tax_country==0

*time-invariant trade to all destination country for each commodity-firm
bysort identifier comcode: egen total_trade=total(svalue) if year<2009
sort identifier comcode year
bysort identifier comcode: replace total_trade=total_trade[_n-1] if total_trade==.

*time-invariant trade to each destination country for each commodity-firm
bysort identifier comcode alpha3code: egen ctry_trade=total(svalue) if year<2009
bysort identifier comcode alpha3code: replace ctry_trade=ctry_trade[_n-1] if ctry_trade==.

gen share_trade=ctry_trade/total_trade
*sum share, d
gen bigshare=(share>=0.01)

gen low5_bigshare=0 
forv i=1/5 {
bysort identifier comcode year: egen tax_min=min(tax) if low5_bigshare==0 & bigshare==1
replace low5_bigshare=1 if tax==tax_min & bigshare==1 & low_tax_country==1
qui drop tax_min
}

tab low5_bigshare if low_tax_country==1
tab low5_bigshare if low_tax_country==0

*time-invariant trade to each destination country for each commodity-firm

sum total_trade, d
gen bigvolume=(total_trade>=r(p10))

gen low5_bigvolume=0
forv i=1/5 {
bysort identifier comcode year: egen tax_min=min(tax) if low5_bigvolume==0 & bigvolume==1
replace low5_bigvolume=1 if tax==tax_min & bigvolume==1 & low_tax_country==1
qui drop tax_min
}

*tab low5_bigvolume if low_tax_country==1
*tab low5_bigvolume if low_tax_country==0

keep id year quintile low5 low5_*
drop if id==.
qui save "${hmrc_datadir}\export_reg_fy_indicator", replace

foreach x in export {
qui use "${hmrc_datadir}\\`x'_reg_fy", clear
drop if identifier==.
merge 1:1 id year using "${hmrc_datadir}\\`x'_reg_fy_indicator"
drop if year<2005
*tab _merge
drop _merge

gen insample_ukmnc= (mnc==1 & type!="foreign_subs")
gen insample_foreignmnc= (mnc==1 & type=="foreign_subs")
gen insample_mnc= (mnc==1)

gen profit_making=trading_profit_r>0

gen post2009=year>=2009
gen r_lowwedge_pi_post2009=r_lowwedge*profit_making*post2009
gen r_highwedge_pi_post2009=r_highwedge*profit_making*post2009

gen tax_wedge=uk_tax-tax
gen tax_wedge_sqr=tax_wedge^2

gen r_wedge=related*tax_wedge
gen r_wedge_pi=r_wedge*profit_making
gen r_wedge_pi_post2009=r_wedge_pi*post2009

gen r_lowwedge_sqr=related*tax_wedge_sqr*low_tax_country
gen r_highwedge_sqr=related*tax_wedge_sqr*high_tax_country

gen r_lowwedge_pi=r_lowwedge*profit_making
gen r_highwedge_pi=r_highwedge*profit_making
gen r_lowwedge_pi_sqr=r_lowwedge_sqr*profit_making
gen r_highwedge_pi_sqr=r_highwedge_sqr*profit_making

gen r_lowwedge_pi_post2009_sqr=r_lowwedge_pi_sqr*post2009
gen r_highwedge_pi_post2009_sqr=r_highwedge_pi_sqr*post2009

tab quintile, gen(q)
forv i=1/5{
replace q`i'=0 if q`i'==.
}

gen q1_wedge=q1*tax_wedge
gen q1_r_wedge=q1*r_wedge

gen q1_r_wedge_pi=q1*r_lowwedge*profit_making
gen q1_r_wedge_pi_post2009=q1*r_lowwedge*profit_making*post2009

gen l5_wedge=low5*tax_wedge
gen l5_r_wedge=low5*r_wedge
gen l5_r_wedge_pi=low5*r_wedge_pi
gen l5_r_wedge_pi_post2009=low5*r_wedge_pi_post2009

gen l5bs_wedge=low5_bigshare*tax_wedge
gen l5bs_r_wedge=low5_bigshare*r_wedge
gen l5bs_r_wedge_pi=low5_bigshare*r_wedge_pi
gen l5bs_r_wedge_pi_post2009=low5_bigshare*r_wedge_pi_post2009

gen l5bv_wedge=low5_bigvolume*tax_wedge
gen l5bv_r_wedge=low5_bigvolume*r_wedge
gen l5bv_r_wedge_pi=low5_bigvolume*r_wedge_pi
gen l5bv_r_wedge_pi_post2009=low5_bigvolume*r_wedge_pi_post2009

gen tax_wedge_firm=marginal_tax-tax
gen r_wedge_firm=tax_wedge_firm*related

gen tax_wedge_firm_sqr=tax_wedge_firm^2
gen r_wedge_firm_sqr=r_wedge_firm^2

gen q1_wedge_firm=q1*tax_wedge_firm
gen q1_r_wedge_firm=q1*r_wedge_firm

gen l5_wedge_firm=low5*tax_wedge_firm
gen l5_r_wedge_firm=low5*r_wedge_firm
gen l5bs_wedge_firm=low5_bigshare*tax_wedge_firm
gen l5bs_r_wedge_firm=low5_bigshare*r_wedge_firm
gen l5bv_wedge_firm=low5_bigvolume*tax_wedge_firm
gen l5bv_r_wedge_firm=low5_bigvolume*r_wedge_firm

qui save "${hmrc_datadir}\\`x'_ctrygroup_reg_fy", replace
}


